	use if inrange(age, 40, 55) & year==2017 using "${clean_data}/panel_physicians_clean.dta", clear

	gen sh_pftloss = pftloss/ptotinc	
	gen sh_w2inc = totw2comp/ptotinc
	gen sh_notw2inc = 1-sh_w2inc	
	
	gen general_surgery = cms_spec_code == "02"
	gen primary_care = spec_category_code == 1
	gen neurosurgery = cms_spec_code == "14"
	gen cardiology = cms_spec_code == "06"
	gen family_practice = cms_spec_code == "08"

	gen stateNYNJ = (stfips == "36" | stfips == "34")
	gen stateCA = stfips == "06"
	gen stateTX = stfips =="48"
	gen stateFL = stfips == "12"
	gen stateAZ = stfips=="04"
	
	keep id ///
	     totw2comp ptotinc aginc ///
	     pftloss with25Kbizinc sh_*  ///
	     einsize wkh received_1099ssa ///
	     primary_care general_surgery neurosurgery ///
	     cardiology age family_practice ///
	     female is_foreign married top5_school state* 
	
	isid id 
	save ${temp}/incpct00.dta, replace 
	
	*Income cutoffs 
	foreach incpct in 50 75 90 95 99 {
		preserve 
		gegen p`=`incpct'-1'95=pctile(ptotinc), p(`=`incpct'-1'.95)
		gegen p`incpct'=pctile(ptotinc), p(`incpct')
		gegen p`incpct'05=pctile(ptotinc), p(`incpct'.05)
		gen dp`=`incpct'-1'95p`incpct'05=(ptotinc>p`=`incpct'-1'95 & ptotinc<p`incpct'05)
		gegen count_cutoff_ur=sum(dp`=`incpct'-1'95p`incpct'05)
		gegen aux=mean(ptotinc) if dp`=`incpct'-1'95p`incpct'05==1
		gegen cutoff=max(aux)
		keep if ptotinc>p`incpct'
		drop p`=`incpct'-1'95 p`incpct'05 dp`=`incpct'-1'95p`incpct'05 p`incpct' aux
		local name=`=100-`incpct''		
		save ${temp}/incpct`incpct'.dta, replace 
		restore
	}
	
	foreach incpct in 00 50 75 90 95 99 {
		use ${temp}/incpct`incpct'.dta, replace 
		// Compute medians within income group
		foreach var in ptotinc sh_pftloss sh_w2inc sh_notw2inc {
			gegen p495=pctile(`var'), p(49.5)
			gegen p505=pctile(`var'), p(50.5)	
			gen dp495p505=(`var'>p495 & `var'<p505)
			gegen aux=mean(`var') if dp495p505==1
			gegen median_`var'=max(aux)
			gegen count_median`var'_ur=sum(dp495p505)
			drop p495 p505 dp495p505 aux
		}
		 gen obs=1
		 gcollapse (mean) mean_* = *  (sum) N_UR = obs, wildparse
		 drop mean_id mean_obs
		 gen sample="top100min`incpct'percent"
		 order sample
		 save ${temp}/collapsed_top100min`incpct'percent.dta, replace 
	}


	 use ${temp}/collapsed_top100min00percent.dta, clear
	 foreach incpct in 50 75 90 95 99 {		 
	 append using ${temp}/collapsed_top100min`incpct'percent.dta
	 }
	 
	 order N_UR, last
	 order sample mean_cutoff mean_count_cutoff_ur ///
		mean_median_ptotinc mean_count_medianptotinc_ur ///
		mean_ptotinc mean_median_sh_pftloss ///
		mean_count_mediansh_pftloss_ur ///
		mean_median_sh_w2inc mean_count_mediansh_w2inc_ur ///
		mean_median_sh_notw2inc mean_count_mediansh_notw2inc_ur
		
	foreach var of varlist mean_cutoff mean_count_cutoff_ur ///
		mean_median_ptotinc mean_count_medianptotinc_ur ///
		mean_median_sh_pftloss ///
		mean_count_mediansh_pftloss_ur ///
		mean_median_sh_w2inc mean_count_mediansh_w2inc_ur ///
		mean_median_sh_notw2inc mean_count_mediansh_notw2inc_ur {
		local newvarname=substr("`var'",6,.)
		rename `var' `newvarname'
		}
	replace sample = "top100percent" if sample=="top100min00percent"
	replace sample = "top50percent" if sample=="top100min50percent"
	replace sample = "top25percent" if sample=="top100min75percent"
	replace sample = "top10percent" if sample=="top100min90percent"
	replace sample = "top5percent" if sample=="top100min95percent"
	replace sample = "top1percent" if sample=="top100min99percent"
	
	format mean_wkh %9.0g
	ds N_UR *ur sample, not
	drbest_docinc `r(varlist)'
	drbcount N_UR, g(N)
	order sample N
	export delimited using "${mypath}/intermediate_csv/desc04-top_earners.csv", replace dataf delim(tab)  
